Requirements

library(tidyverse)
library(plyr)
library(Rfast)
library(tryCatchLog)
library(lmerTest)
library(car)
library(viridis)
library(gridExtra)
library(kableExtra)
library(doParallel)
library(doSNOW)
library(gghalves)
library(ggdist)
library(ggpubr)
library(grid)

Datasets

Sample metadata

metadata <- read.csv("src/metadata.csv")
kbl(metadata) %>% kable_styling()
sample.id parent phenotype individual lineage
T3-AD_A2C D unresponsive A2C A
T3-AQ_A2C Q unresponsive A2C A
T3-AD_A7C D unresponsive A7C A
T3-AQ_A7C Q unresponsive A7C A
T3-AD_A9C D unresponsive A9C A
T3-AQ_A9C Q unresponsive A9C A
T2-AD_A3T D responsive A3T A
T2-AQ_A3T Q responsive A3T A
T2-AD_A1T D responsive A1T A
T2-AQ_A1T Q responsive A1T A
T3-AD_A1T D responsive A1T A
T3-AQ_A1T Q responsive A1T A
T2-BD_B6C D unresponsive B6C B
T2-BQ_B6C Q unresponsive B6C B
T2-BD_B2C D unresponsive B2C B
T2-BQ_B2C Q unresponsive B2C B
T2-BD_B5C D unresponsive B5C B
T2-BQ_B5C Q unresponsive B5C B
T2-BD_B3T D responsive B3T B
T2-BQ_B3T Q responsive B3T B
T2-BD_B2T D responsive B2T B
T2-BQ_B2T Q responsive B2T B
T2-BD_B8T D responsive B8T B
T2-BQ_B8T Q responsive B8T B

Generate SNP:gene count matrices for each dataset

Transcription

# Load expression matrix
SNP_counts <- read.table("results/coverage_matrix_2022-08-03.txt",
                         sep="\t",header=T)
# Load SNP:gene table
SNPs <- SNP_counts[,c(1:5)]
# Load list of lincRNA transcript IDs
lincRNAs <- data.frame(Transcript=unique(
  read.table("results/lincRNA_txpts.txt")[,1]))
## Generate pseudo GeneIDs for lincRNA transcripts
lincRNAs$Gene <- paste0("GeneID:lincRNA_",
                        seq.int(1,length(lincRNAs$Transcript),1))
SNPs <- SNPs %>% left_join(lincRNAs,by="Transcript") %>% 
  mutate(Gene=coalesce(Gene.y,Gene.x)) %>%
  select(-Gene.x,-Gene.y)
SNPs <- SNPs[,c(1,5,2,3,4)]
rm(lincRNAs)
# Get transcript IDs not associated with GeneIDs
SNPs_gene <- SNPs %>% filter(str_detect(Gene, "Gene"))
SNPs_noGene <- data.frame(Gene=unique(SNPs[!SNPs$Transcript%in%
                                             SNPs_gene$Transcript,"Gene"]))
rm(SNPs_gene)
## Generate pseudo GeneIDs for novel genes
SNPs_noGene$newGeneID <- paste0("GeneID:novel_",seq.int(
  1,length(SNPs_noGene$Gene),1))
SNPs <- SNPs %>% left_join(SNPs_noGene,by="Gene") %>% 
  mutate(Gene=coalesce(newGeneID,Gene)) %>%
  select(-newGeneID)
rm(SNPs_noGene)
# Generate SNP IDs based on Chromosome and Genomic_pos
SNPs$SNP_pos <- paste(SNPs$Chromosome,SNPs$Genomic_pos,sep=":")
SNP.list <- data.frame(SNP_pos=unique(SNPs$SNP_pos),
                       SNP=c(1:length(unique(SNPs$SNP_pos))))
SNP.list$SNP <- paste("snp",SNP.list$SNP,sep="_")
SNPs <- SNPs %>% left_join(SNP.list,by="SNP_pos")
rm(SNP.list)
# Simplify transcript IDs
tx.list <- data.frame(Transcript=unique(SNP_counts$Transcript),
                      tx.ID=c(1:length(unique(SNP_counts$Transcript))))
tx.list$tx.ID <- paste("tx",tx.list$tx.ID,sep="_")
SNPs <- SNPs %>% left_join(tx.list,by="Transcript")
rm(tx.list)
# Combine GeneIDs and transcript IDs
SNPs$Gene <- paste(sapply(strsplit(SNPs$Gene, split = ":"), "[", 2 ),
                   SNPs$tx.ID,sep=".")
# Combine new GeneIDs with SNP IDs
SNPs$Gene <- paste(SNPs$SNP,SNPs$Gene,sep=":")
# Replace count matrix GeneIDs with new GeneIDs
SNP_counts$Gene <- SNPs$Gene
rm(SNPs)
# Clean up
row.names(SNP_counts) <- SNP_counts$Gene
write.csv(SNP_counts[,c(1:5)],"results/SNP_gene_tx_IDs.csv")
SNP_counts[,c(1:5)] <- NULL
SNP_counts[is.na(SNP_counts)] <- 0
names(SNP_counts) <- gsub("[.]", "-", names(SNP_counts))
# Save count matrix to file
write.csv(SNP_counts,"results/EHBxAHB_PS_expression.csv")

RNA m6A

# Load RNA m6A probability matrix
SNP_m6A <- read.table("results/probM_matrix_2022-08-03.txt",sep="\t",
                      header=T)[-c(1:5)]
# Rows and columns are the same as SNP_counts
row.names(SNP_m6A) <- row.names(SNP_counts)
names(SNP_m6A) <- names(SNP_counts)
# Clean up
SNP_m6A[is.na(SNP_m6A)] <- 0
# Write RNA m6A probability matrix to file
write.csv(SNP_m6A,"results/EHBxAHB_PS_m6A.csv")

Parent-of-origin allele-specific transcription

Preprocess count matrices

Split count matrices by phenotype

unresponsive.IDs <- metadata[metadata$phenotype=="unresponsive","sample.id"]
unresponsive_counts <- SNP_counts[,names(SNP_counts)%in%unresponsive.IDs]

responsive.IDs <- metadata[metadata$phenotype=="responsive","sample.id"]
responsive_counts <- SNP_counts[,names(SNP_counts)%in%responsive.IDs]

Filter low-count SNPs

# Function to filter SNPs
filter.SNPs <- function(counts,metadata,lcf){
  # Remove rows with < lcf counts counts by cross
  LA <- metadata[metadata$lineage=="A","sample.id"]
  LB <- metadata[metadata$lineage=="B","sample.id"]
  counts <- counts[rowSums(counts[,names(counts)%in%LA])>lcf,]
  counts <- counts[rowSums(counts[,names(counts)%in%LB])>lcf,]
  # Remove rows with greater than 10000 counts (cannot run SK test on these)
  counts$SUM <- rowSums(counts)
  counts <- counts[counts$SUM<10000,]
  counts$SUM <- NULL
  # Remove rows with duplicate counts and with < 2 SNPs
  counts$gene <- as.character(map(strsplit(row.names(counts), split = ":"), 2))
  genelist <- unique(counts$gene)
  delete.rows <- list()
  for(i in 1:length(genelist)){
    tmp <- counts[counts$gene==genelist[i],]
    tmp <- tmp[!duplicated(tmp),]
    if(length(row.names(tmp))<2){append(delete.rows,genelist[i])}
  }
  counts <- counts[!counts$gene%in%unlist(delete.rows),]
  counts$gene <- NULL
  # Return filtered counts
  return(counts)
}
unresponsive_counts <- filter.SNPs(unresponsive_counts,metadata,9)
write.csv(unresponsive_counts,"results/EHBxAHB_unresponsive_counts.csv")
responsive_counts <- filter.SNPs(responsive_counts,metadata,9)
write.csv(responsive_counts,"results/EHBxAHB_responsive_counts.csv")

Conduct statistical tests

Conduct Storer-Kim test at each tx:SNP

# Storer-Kim test
## (Function from the WRS2 package). Test the hypothesis that two independent binomials have equal probability of success.
## Modified for efficiency: changed outer() command to Rfast::Outer()
twobinom<-function(r1=sum(elimna(x)),n1=length(x),
                   r2=sum(elimna(y)),n2=length(y),
                   x=NA,y=NA,alpha=.05){
  n1p<-n1+1
  n2p<-n2+1
  n1m<-n1-1
  n2m<-n2-1
  q <- r1/n1
  p <- r2/n2
  if(is.na(q)){q <- 0}
  if(is.na(p)){p <- 0}
  chk<-abs(q-p)
  x<-c(0:n1)/n1
  suppressWarnings(if(is.na(x)){x <- 0})
  y<-c(0:n2)/n2
  suppressWarnings(if(is.na(y)){y <- 0})
  phat<-(r1+r2)/(n1+n2)
  m1<-t(Outer(x,y,"-"))
  m2<-matrix(1,n1p,n2p)
  flag<-(abs(m1)>=chk)
  m3<-m2*flag
  rm(m1,m2,flag)
  xv<-c(1:n1)
  yv<-c(1:n2)
  xv1<-n1-xv+1
  yv1<-n2-yv+1
  dis1<-c(1,pbeta(phat,xv,xv1))
  dis2<-c(1,pbeta(phat,yv,yv1))
  pd1<-NA
  pd2<-NA
  for(i in 1:n1){pd1[i]<-dis1[i]-dis1[i+1]}
  for(i in 1:n2){pd2[i]<-dis2[i]-dis2[i+1]}
  pd1[n1p]<-phat^n1
  pd2[n2p]<-phat^n2
  m4<-t(Outer(pd1,pd2,"*"))
  test<-sum(m3*m4)
  rm(m3,m4)
  list(p.value=test,p1=q,p2=p,est.dif=q-p)
}
# Wrapper for Storer-Kim test
PSGE.SK <- function(counts,metadata,phenotype,cores){
  pat.exp <- counts[,metadata[metadata$parent%in%c("D")&
                                metadata$phenotype==phenotype,"sample.id"]]
  mat.exp <- counts[,metadata[metadata$parent%in%c("Q")&
                                metadata$phenotype==phenotype,"sample.id"]]
  registerDoParallel(cores=cores)
  i.len=length(row.names(pat.exp))
  writeLines(c(""), "SKlog.txt")
  sink("SKlog.txt", append=TRUE)
  return.df <- foreach(i=1:i.len, .combine=rbind, 
                       .export=ls(globalenv()),.packages="Rfast") %dopar% {
    cat(paste("Row ",paste(paste0(paste0(i," of "),i.len),Sys.time()," "),"\n"))
    SNP_gene=row.names(pat.exp[i,])
    p1.s=sum(pat.exp[i,])
    p2.s=sum(mat.exp[i,])
    p.o=sum(p1.s,p2.s)
    test=twobinom(r1=p1.s,n1=p.o,r2=p2.s,n2=p.o)$p.value
    return.append=data.frame(SNP_gene=SNP_gene,p=test)
    return(return.append)
  }
  sink()
  return.df=return.df[match(row.names(pat.exp), return.df$SNP_gene),]
  return(return.df)
}
unresponsive.SK <- PSGE.SK(unresponsive_counts,metadata,"unresponsive",2)
write.csv(unresponsive.SK,"results/EHBxAHB_unresponsiveSK.csv", row.names=F)

responsive.SK <- PSGE.SK(responsive_counts,metadata,"responsive",2)
write.csv(responsive.SK,"results/EHBxAHB_responsiveSK.csv", row.names=F)

Conduct GLIMMIX test for each transcript

# General linear mixed effects model
## Requires tryCatchLog package!
PSGE.GLIMMIX <- function(counts,metadata,cores){
  counts$SNP_gene <- row.names(counts)
  counts$geneID <- as.character(unlist(map(strsplit(counts$SNP_gene, 
                                                    split = ":"), 2)))
  genelist <- unique(counts$geneID)
  registerDoParallel(cores=cores)
  writeLines(c(""), "GLIMMIXlog.txt")
  i.len <- length(genelist)
  sink("GLIMMIXlog.txt", append=TRUE)
  df.out <- foreach(i=1:i.len,.combine=rbind) %dopar% {
    cat(paste("Row ",paste(paste0(paste0(i," of "),i.len),Sys.time()," "),"\n"))
    counts.sub <- counts[counts$geneID==genelist[i],]
    counts.sub$geneID <- NULL
    counts.sub <- gather(counts.sub, sample.id, count, 
                         names(counts.sub), -SNP_gene, factor_key=TRUE)
    counts.sub <- join(counts.sub, metadata, by = "sample.id")
    counts.sub$parent <- as.factor(str_sub(counts.sub$parent,-1,-1))
    counts.sub$SNP_gene <- as.factor(counts.sub$SNP_gene)
    counts.sub$lineage <- as.factor(counts.sub$lineage)
    counts.sub$individual <- as.factor(counts.sub$individual)
    testfail <- F
    test <- "null"
    tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+
                               (1|SNP_gene)+(1|individual),data=counts.sub), 
                error = function(e) {testfail <- T})
    if(class(test)=="character"){testfail <- T}
    if(testfail==F){
      test <- summary(test)
      parent.p.list <- test[["coefficients"]][2,5]
      cross.p.list <- test[["coefficients"]][3,5]
      parent.cross.p.list <- test[["coefficients"]][4,5]
    }else{
      parent.p.list <- 1
      cross.p.list <- 1
      parent.cross.p.list <- 1
    }
    return(data.frame(ID=genelist[i],
                      parent.p=parent.p.list,
                      cross.p=cross.p.list,
                      parentXcross.p=parent.cross.p.list))
  }
  sink()
  return(df.out)
}
unresponsive.GLIMMIX <- PSGE.GLIMMIX(unresponsive_counts,metadata,2)
write.csv(unresponsive.GLIMMIX,"results/EHBxAHB_unresponsiveGLIMMIX.csv", 
          row.names=F)

responsive.GLIMMIX <- PSGE.GLIMMIX(responsive_counts,metadata,2)
write.csv(responsive.GLIMMIX,"results/EHBxAHB_responsiveGLIMMIX.csv", 
          row.names=F)

Assess test results for each gene

PSGE.analysis <- function(counts,phenotype,metadata,SK,GLIMMIX){
  # Split count matrices by cross and parent of origin for plotting
  p1.pat <- counts[,metadata[metadata$parent%in%c("D")&
                               metadata$lineage=="B"&
                               metadata$phenotype==phenotype,"sample.id"]]
  p1.mat <- counts[,metadata[metadata$parent%in%c("Q")&
                               metadata$lineage=="B"&
                               metadata$phenotype==phenotype,"sample.id"]]
  p2.pat <- counts[,metadata[metadata$parent%in%c("D")&
                               metadata$lineage=="A"&
                               metadata$phenotype==phenotype,"sample.id"]]
  p2.mat <- counts[,metadata[metadata$parent%in%c("Q")&
                               metadata$lineage=="A"&
                               metadata$phenotype==phenotype,"sample.id"]]
  # Set up a data.frame to plot %p1 and %p2 for each SNP
  p1.plot <- data.frame(rowSums(p1.pat)/(rowSums(p1.mat)+rowSums(p1.pat)))
  names(p1.plot) <- c("p1")
  p1.plot[is.nan(p1.plot$p1),"p1"] <- 0
  p2.plot <- data.frame(rowSums(p2.mat)/(rowSums(p2.mat)+rowSums(p2.pat)))
  names(p2.plot) <- c("p2")
  p2.plot[is.nan(p2.plot$p2),"p2"] <- 0
  plot <- cbind(p1.plot,p2.plot)
  # Join results of Storer-Kim tests
  plot <- plot[row.names(plot)%in%SK$SNP_gene,]
  plot$SK.p <- SK$p
  plot$SNP_gene <- row.names(plot)
  plot$gene <- as.character(map(strsplit(plot$SNP_gene, split = ":"), 2))
  # Prep GLIMMIX results for downstream analyses
  GLIMMIX.biased <- data.frame(gene=GLIMMIX$ID,parent.p=GLIMMIX$parent.p,
                               cross.p=GLIMMIX$cross.p,parentXcross.p=GLIMMIX$
                                 parentXcross.p)
  # Correct for multiple testing
  plot$SK.padj <- p.adjust(plot$SK.p,"BH")
  plot$bias <- "NA"
  GLIMMIX$parent.padj <- p.adjust(GLIMMIX$parent.p,"BH")
  GLIMMIX$cross.padj <- p.adjust(GLIMMIX$cross.p,"BH")
  GLIMMIX$parentXcross.padj <- p.adjust(GLIMMIX$parentXcross.p,"BH")
  GLIMMIX.biased <- GLIMMIX[GLIMMIX$parent.padj<0.05|GLIMMIX$cross.padj<0.05,1]
  GLIMMIX.biased <- setdiff(GLIMMIX.biased,GLIMMIX[GLIMMIX$
                                                     parentXcross.padj<0.05,1])
  # For each gene, check whether all SNPs are biased in the same direction
  for(i in 1:length(row.names(plot))){
    p <- plot[i,"SK.padj"]
    p1 <- plot[i,"p1"]
    p2 <- plot[i,"p2"]
    if(p<0.05&p1>0.6&p2<0.4){plot[i,"bias"] <- "pat"}
    if(p<0.05&p1<0.4&p2>0.6){plot[i,"bias"] <- "mat"}
    if(p<0.05&p1<0.4&p2<0.4){plot[i,"bias"] <- "EHB"}
    if(p<0.05&p1>0.6&p2>0.6){plot[i,"bias"] <- "AHB"}
  }
  biaslist <- data.frame(matrix(ncol=2,nrow=0))
  names(biaslist) <- c("gene","bias")
  genelist <- unique(plot$gene)
  for(i in 1:length(genelist)){
    tmp <- unique(plot[plot$gene==genelist[i],"bias"])
    if(length(tmp)>1){
      if(length(tmp)==2){
        if(any(tmp%in%"NA")){
          bias <- tmp[!tmp%in%"NA"]
        }else{bias <- "NA"}
      }else{
        bias <- "NA"
      }
    }else{bias <- tmp}
    biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
  }
  plot <- plot %>% left_join(biaslist, by = c('gene' = 'gene')) 
  names(plot)[c(7:8)] <- c("xbias","bias")
  plot$bias.plot <- "NA"
  for(i in 1:length(row.names(plot))){
    p1 <- plot$p1[i]
    p2 <- plot$p2[i]
    bias <- plot$bias[i]
    if(!bias=="NA"){
      if(bias=="pat"){if(p1>0.6&p2<0.4){plot[i,"bias.plot"]<- "pat"}}
      if(bias=="mat"){if(p1<0.4&p2>0.6){plot[i,"bias.plot"] <- "mat"}}
      if(bias=="EHB"){if(p1<0.4&p2<0.4){plot[i,"bias.plot"] <- "EHB"}}
      if(bias=="AHB"){if(p1>0.6&p2>0.6){plot[i,"bias.plot"] <- "AHB"}}
    }
  }
  plot[!plot$gene%in%GLIMMIX.biased,"bias.plot"] <- "NA" 
  plot <- rbind(plot[plot$bias.plot%in%c("NA"),],
                plot[plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
  plot$bias.plot <- factor(plot$bias.plot,
                           levels = c("NA","mat", "AHB", "EHB", "pat"))
  return(plot)
}
unresponsive.plot <- PSGE.analysis(unresponsive_counts,"unresponsive",metadata,
                                   unresponsive.SK,unresponsive.GLIMMIX)
write.csv(unresponsive.plot,"results/EHBxAHB_unresponsive_PSGE.csv",row.names=F)

responsive.plot <- PSGE.analysis(responsive_counts,"responsive",metadata,
                                 responsive.SK,responsive.GLIMMIX)
write.csv(responsive.plot,"results/EHBxAHB_responsive_PSGE.csv",row.names=F)

Plot data for each phenotype

Plotting functions

Collapse SNPs to calculate p1 & p2 by transcript [PSGE.collapse.avgExp]

# Plot average by transcript
PSGE.collapse.avgExp <- function(data.plot,data.counts,metadata,phenotype){
  data.counts$SNP_gene <- row.names(data.counts)
  data <- data.plot %>% 
      left_join(data.counts, by = c('SNP_gene' = 'SNP_gene')) 
  genelist <- unique(data$gene)
  p1.mean <- list()
  p2.mean <- list()
  biaslist <- list()
  for(i in 1:length(genelist)){
    tmp <- data[data$gene==genelist[i],]
    if(!any(tmp$bias=="NA") & length(tmp[!tmp$bias.plot=="NA","p1"])>0){
      tmp.sub <- tmp[!tmp$bias.plot=="NA",]
       # Split count matrices by cross and parent of origin for plotting
      p1.pat <- tmp.sub[,metadata[metadata$parent%in%c("D")&
                                  metadata$lineage=="B"&
                                  metadata$phenotype==phenotype,"sample.id"]]
      p1.mat <- tmp.sub[,metadata[metadata$parent%in%c("Q")&
                                 metadata$lineage=="B"&
                                 metadata$phenotype==phenotype,"sample.id"]]
      p2.pat <- tmp.sub[,metadata[metadata$parent%in%c("D")&
                                  metadata$lineage=="A"&
                                  metadata$phenotype==phenotype,"sample.id"]]
      p2.mat <- tmp.sub[,metadata[metadata$parent%in%c("Q")&
                                  metadata$lineage=="A"&
                                  metadata$phenotype==phenotype,"sample.id"]]
      p1.mean.x <- mean(sum(p1.pat)/(sum(p1.mat)+sum(p1.pat)))
      if(is.nan(p1.mean.x)){p1.mean.x <- 0}
      if(is.infinite(p1.mean.x)){p1.mean.x <- 1}
      p1.mean[i] <- p1.mean.x
      p2.mean.x <- mean(sum(p2.mat)/(sum(p2.mat)+sum(p2.pat)))
      if(is.nan(p2.mean.x)){p2.mean.x <- 0}
      if(is.infinite(p2.mean.x)){p2.mean.x <- 1}
      p2.mean[i] <- p2.mean.x
      biaslist[i] <- as.character(tmp.sub$bias.plot[1])
    }else{
      p1.mean[i] <- mean(tmp$p1)
      p2.mean[i] <- mean(tmp$p2)
      biaslist[i] <- "NA"}}
  return.data <- data.frame(gene=unlist(genelist),
                            bias.plot=unlist(biaslist),
                            p1=unlist(p1.mean),
                            p2=unlist(p2.mean))
  return.data$bias.plot <- factor(return.data$bias.plot,
                                  levels = c("NA","mat", "AHB", "EHB", "pat"))
  return.data <- return.data[order(return.data$bias.plot),]
  return(return.data)
}

PSGE plot by transcript [PSGE.plot.tx]

PSGE.plot.tx <- function(data,title){
  g <- ggplot(data, aes(x=p1, y=p2,
                        color=bias.plot,alpha=0.5)) + 
    geom_point(size=3) + theme_classic() +
    xlab(expression(paste("% A allele in ",E[mother],
                          " x ",A[father],sep=""))) +
    ylab(expression(paste("% A allele in ",A[mother],
                          " x ",E[father],sep=""))) +
    ggtitle(title) +
    theme(text = element_text(size=18),
          plot.title = element_text(hjust = 0.5)) +
    scale_color_manual(breaks = levels(data$bias.plot),
                       values=c("grey90","#000000","#009e73","#e69f00",
                                "#56b4e9")) +
    guides(alpha=F, color=F) +
    scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, .2)) +
    scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, .2))
  return(g)
}

Make center tri-plot [triplot.plot]

triplot.plot <- function(unresponsive.plot,responsive.plot,
                         label.A,label.B,allgenes){
  gmid.df <- data.frame(Unresponsive=c(length(
    unique(unresponsive.plot[unresponsive.plot$bias.plot=="mat","gene"])),
                                       length(unique(unresponsive.plot[unresponsive.plot$bias.plot=="AHB","gene"])),
                                       length(unique(unresponsive.plot[unresponsive.plot$bias.plot=="EHB","gene"])),
                                       length(unique(unresponsive.plot[unresponsive.plot$bias.plot=="pat","gene"]))),
                        Bias=c("mat","AHB","EHB","pat"),
                        Responsive=c(length(unique(
                          responsive.plot[responsive.plot$
                                            bias.plot=="mat","gene"])),
                                     length(unique(responsive.plot[responsive.plot$bias.plot=="AHB","gene"])),
                                     length(unique(responsive.plot[responsive.plot$bias.plot=="EHB","gene"])),
                                     length(unique(responsive.plot[responsive.plot$bias.plot=="pat","gene"]))))
  
  ## Test if # of unresponsive biased genes is different from responsive biased genes
  mat.test <- chisq.test(data.frame(Success=c(gmid.df[1,1],gmid.df[1,3]),
                                    Failure=c(length(allgenes)-gmid.df[1,1],
                                              length(allgenes)-gmid.df[1,3]),
                                    row.names=c("unresponsive",
                                                "responsive")))$p.value
  AHB.test <- chisq.test(data.frame(Success=c(gmid.df[2,1],gmid.df[2,3]),
                                    Failure=c(length(allgenes)-gmid.df[2,1],
                                              length(allgenes)-gmid.df[2,3]),
                                    row.names=c("unresponsive",
                                                "responsive")))$p.value
  EHB.test <- chisq.test(data.frame(Success=c(gmid.df[3,1],gmid.df[3,3]),
                                    Failure=c(length(allgenes)-gmid.df[3,1],
                                              length(allgenes)-gmid.df[3,3]),
                                    row.names=c("unresponsive",
                                                "responsive")))$p.value
  pat.test <- chisq.test(data.frame(Success=c(gmid.df[4,1],gmid.df[4,3]),
                                    Failure=c(length(allgenes)-gmid.df[4,1],
                                              length(allgenes)-gmid.df[4,3]),
                                    row.names=c("unresponsive",
                                                "responsive")))$p.value
  ## Build table
  gmid.df$`.` <- c(mat.test,AHB.test,EHB.test,pat.test)
  gmid.df <- gmid.df[,c(4,1,2,3)]
  nsrows <- row.names(gmid.df[gmid.df$`.`>0.05,])
  gmid.df$`.` <- formatC(gmid.df$`.`, format = "e", digits = 2)
  gmid.df[nsrows,"."] <- "(ns)"
  gmid.df <- gmid.df[,c(2,3,4,1)]
  cols <- matrix("black", nrow(gmid.df), ncol(gmid.df))
  cols[1,2] <- "#000000"
  cols[2,2] <- "#009e73"
  cols[3,2] <- "#e69f00"
  cols[4,2] <- "#56b4e9"
  ccols <- matrix("white", nrow(gmid.df), ncol(gmid.df))
  ccols[1,3] <- "#f4efea"
  ccols[2,3] <- "#f4efea"
  ccols[3,3] <- "#f4efea"
  ccols[4,3] <- "#f4efea"
  ccols[1,1] <- "#f4efea"
  ccols[2,1] <- "#f4efea"
  ccols[3,1] <- "#f4efea"
  ccols[4,1] <- "#f4efea"
  ccols[1,2] <- "#e4d8d1"
  ccols[2,2] <- "#e4d8d1"
  ccols[3,2] <- "#e4d8d1"
  ccols[4,2] <- "#e4d8d1"
  cfonts <- matrix("plain", nrow(gmid.df), ncol(gmid.df))
  cfonts[1,2] <- "bold"
  cfonts[2,2] <- "bold"
  cfonts[3,2] <- "bold"
  cfonts[4,2] <- "bold"
  names(gmid.df) <- c(label.A,"Bias",label.B,".")
  gmid.df[2,2] <- "AHB"
  gmid.df[3,2] <- "EHB"
  tt <- ttheme_default(core=list(fg_params = list(col = cols, 
                                                  cex = 1,
                                                  fontface = cfonts),
                                 bg_params = list(col=NA,
                                                  fill = ccols),
                                 padding.h=unit(2, "mm")),
                       rowhead=list(bg_params = list(col=NA)),
                       colhead=list(bg_params = list(fill = c("#f4efea",
                                                              "#e4d8d1",
                                                              "#f4efea",
                                                              "white")),
                                    fg_params = list(rot=90,
                                                     cex = 1,
                                                     col = c("black",
                                                             "black",
                                                             "black",
                                                             "white"))))
  
  gmid <- tableGrob(gmid.df, rows = NULL, theme=tt)
  return(gmid)
}

Calculate allelic expression distance ratios [PSGE.collapse.hourglass]

PSGE.collapse.hourglass <- function(data.plot,data.counts,group,metadata){
  data.counts$SNP_gene <- row.names(data.counts)
  data <- data.plot %>% 
      left_join(data.counts, by = c('SNP_gene' = 'SNP_gene')) 
  data$bias.plot <- as.character(data$bias.plot)
  data <- data[!data$bias.plot=="NA",]
  genelist <- unique(data$gene)
  D <- list()
  biaslist <- list()
  for(i in 1:length(genelist)){
    tmp <- data[data$gene==genelist[i],]
    p1.pat <- tmp[,metadata[metadata$parent%in%c("D")&
                            metadata$lineage=="B"&
                            metadata$phenotype==group,"sample.id"]]
    p1.mat <- tmp[,metadata[metadata$parent%in%c("Q")&
                            metadata$lineage=="B"&
                            metadata$phenotype==group,"sample.id"]]
    p2.pat <- tmp[,metadata[metadata$parent%in%c("D")&
                            metadata$lineage=="A"&
                            metadata$phenotype==group,"sample.id"]]
    p2.mat <- tmp[,metadata[metadata$parent%in%c("Q")&
                            metadata$lineage=="A"&
                            metadata$phenotype==group,"sample.id"]]
    p1.mean <- mean(sum(p1.pat)/(sum(p1.mat)+sum(p1.pat)))
    if(is.nan(p1.mean)){p1.mean <- 0}
    if(is.infinite(p1.mean)){p1.mean <- 1}
    p2.mean <- mean(sum(p2.mat)/(sum(p2.mat)+sum(p2.pat)))
    if(is.nan(p2.mean)){p2.mean <- 0}
    if(is.infinite(p2.mean)){p2.mean <- 1}
    if(any(tmp$bias.plot%in%c("mat","pat"))){
      Da <- sqrt(((p1.mean-0)^2)+((p2.mean-1)^2))
      Db <- sqrt(((p1.mean-1)^2)+((p2.mean-0)^2))
      D[i] <- Da/(Da+Db)
    }
    if(any(tmp$bias.plot%in%c("EHB","AHB"))){
      Da <- sqrt(((p1.mean-0)^2)+((p2.mean-0)^2))
      Db <- sqrt(((p1.mean-1)^2)+((p2.mean-1)^2))
      D[i] <- Da/(Da+Db)
    }
    bias <- tmp$bias.plot[1]
    biaslist <- append(biaslist,bias)
  }
  res <- data.frame(gene=unlist(genelist),
                    bias=unlist(biaslist),
                    D=unlist(D),
                    group=group)
  return(res)
}

Plot allelic expression distance ratios [PSGE.plot.hourglass]

PSGE.plot.hourglass <- function(data,title){
  data$group <- as.factor(data$group)
  data$bias <- factor(data$bias,
                      levels = c("EHB", "AHB", "pat", "mat"))
  data$category <- NA
  data[data$bias%in%c("EHB","AHB"),"category"] <- "lineage"
  data[data$bias%in%c("mat","pat"),"category"] <- "parent"
  groups <- unique(as.character(data$group))
  labels <- c(length(data[data$group==groups[1]&data$bias=="mat","D"]),
              length(data[data$group==groups[2]&data$bias=="mat","D"]),
              length(data[data$group==groups[1]&data$bias=="pat","D"]),
              length(data[data$group==groups[2]&data$bias=="pat","D"]),
              length(data[data$group==groups[1]&data$bias=="EHB","D"]),
              length(data[data$group==groups[2]&data$bias=="EHB","D"]),
              length(data[data$group==groups[1]&data$bias=="AHB","D"]),
              length(data[data$group==groups[2]&data$bias=="AHB","D"]))
  
  g.main.parent <- ggplot(data[data$category=="parent",], 
                          aes(x = category, y = D, fill = group)) + 
    geom_violin(width = .25, alpha = 0.5) +
    geom_boxplot(width= 0.1, color="black",
                 alpha=0, outlier.shape = NA,
                 position = position_dodge(width=0.25)) +
    geom_jitter(position = position_jitterdodge(jitter.width=0.05,
                                                dodge.width=0.25),alpha=0.6) +
    theme_classic() + xlab("") + ylab("") + ggtitle(title) +
    theme(text = element_text(size=16),
          plot.title = element_blank(),
          legend.position = "top",
          legend.title = element_blank(),
          axis.line.x = element_line(arrow=grid::arrow(length=unit(0.3, "cm"),
                                                       ends = "both")),
          axis.line.y = element_blank(),
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank()) + 
    scale_fill_grey(breaks=rev(levels(data$group))) +
    scale_y_continuous(breaks=seq(0, 1,.25),
                       expand=c(0.1,0.1)) +
    scale_x_discrete(expand = c(0,0)) +
    ylab("Maternal bias        Paternal bias") +
    annotation_custom(grobTree(textGrob(labels[1], x=0.1,  y=0.75))) +
    annotation_custom(grobTree(textGrob(labels[3], x=0.9,  y=0.75))) +
    annotation_custom(grobTree(textGrob(labels[2], x=0.1,  y=0.25))) +
    annotation_custom(grobTree(textGrob(labels[4], x=0.9,  y=0.25))) +
    coord_flip()
  
  check.filled <- data[data$category=="lineage",c("bias","group")]
  check <- F
  for(i in 1:length(groups)){
    if(length(check.filled[check.filled$group==groups[i],"bias"])<2){
      check <- T}}
  
  g.main.lineage <- ggplot(data[data$category=="lineage",], 
                           aes(x = category, y = D, fill = group))
  if(check==F){g.main.lineage <- g.main.lineage + 
    geom_violin(width = .25, alpha = 0.5)}
  
  g.main.lineage <- g.main.lineage +
    geom_boxplot(width= 0.1, color="black", 
                 alpha=0, outlier.shape = NA,
                 position = position_dodge(width=0.25)) +
    geom_jitter(position = position_jitterdodge(jitter.width=0.05,
                                                dodge.width=0.25),alpha=0.6) +
    theme_classic() + xlab("") + ylab("") + ggtitle(title) +
    theme(text = element_text(size=16),
          plot.title = element_blank(),
          legend.position = "top",
          legend.title = element_blank(),
          axis.line.x = element_line(arrow=grid::arrow(length=unit(0.3, "cm"),
                                                       ends = "both")),
          axis.line.y = element_blank(),
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank()) + 
    scale_fill_grey(breaks=rev(levels(data$group))) +
    scale_y_continuous(breaks=seq(0, 1,.25),
                       expand=c(0.1,0.1)) +
    scale_x_discrete(expand = c(0,0)) +
    ylab("EHB bias                   AHB bias") +
    annotation_custom(grobTree(textGrob(labels[5], x=0.1,  y=0.75))) +
    annotation_custom(grobTree(textGrob(labels[7], x=0.9,  y=0.75))) +
    annotation_custom(grobTree(textGrob(labels[6], x=0.1,  y=0.25))) +
    annotation_custom(grobTree(textGrob(labels[8], x=0.9,  y=0.25))) +
    coord_flip()
  
  g.main <- ggarrange(g.main.parent, g.main.lineage, ncol=2, nrow=1, 
                      common.legend = TRUE, legend="top")
  
  g <- annotate_figure(g.main, bottom=text_grob(
    "Allelic expression distance ratio", size=18), 
    top=text_grob(title, size=20))

  return(g)
}

Plot PSGE by transcript

unresponsive.collapse <- PSGE.collapse.avgExp(unresponsive.plot,
                                              unresponsive_counts,
                                              metadata,"unresponsive")
g1.avgExp <- PSGE.plot.tx(unresponsive.collapse,"Non-aggressive workers")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
g1.avgExp

responsive.collapse <- PSGE.collapse.avgExp(responsive.plot,responsive_counts,
                                            metadata,"responsive")
g2.avgExp <- PSGE.plot.tx(responsive.collapse,"Aggressive workers")
g2.avgExp

allgenes <- read.table("src/Amel_HAv3.1_genes.bed",header=F)[,c(4)]
triplot.avgExp <- triplot.plot(unresponsive.collapse,responsive.collapse,
                               "Unresponsive","Responsive",allgenes)
## Warning in chisq.test(data.frame(Success = c(gmid.df[2, 1], gmid.df[2, 3]), :
## Chi-squared approximation may be incorrect
## Warning in chisq.test(data.frame(Success = c(gmid.df[3, 1], gmid.df[3, 3]), :
## Chi-squared approximation may be incorrect
plot(triplot.avgExp)

fig1 <- arrangeGrob(g1.avgExp, triplot.avgExp, g2.avgExp, widths=c(5,2.5,5))
ggsave(file="results/AHBxEHB.png", plot=fig1, width=15, height=6)

Plot allelic expression distance ratios of PSGEs by transcripts

unresponsive.collapse.hourglass <- PSGE.collapse.hourglass(unresponsive.plot,
                                                           unresponsive_counts,
                                                           "unresponsive",
                                                           metadata)
responsive.collapse.hourglass <- PSGE.collapse.hourglass(responsive.plot,
                                                         responsive_counts,
                                                         "responsive",
                                                         metadata)
data.hourglass <- rbind(unresponsive.collapse.hourglass,
                        responsive.collapse.hourglass)

g1.hourglass <- PSGE.plot.hourglass(data.hourglass,"EHBxAHB")
plot(g1.hourglass)

ggsave(file="results/EHBxAHB_hourglass.png", plot=g1.hourglass, 
       width=10, height=4, bg="white")

Parent-of-origin allele-specific RNA m6A

Preprocess count matrices

Split RNA m6A probability matrix by phenotype

Subset SNP_counts by unresponsive and responsive phenotypes

unresponsive.IDs <- metadata[metadata$phenotype=="unresponsive","sample.id"]
responsive.IDs <- metadata[metadata$phenotype=="responsive","sample.id"]

unresponsive_m6A <- SNP_m6A[,names(SNP_m6A)%in%unresponsive.IDs]
responsive_m6A <- SNP_m6A[,names(SNP_m6A)%in%responsive.IDs]

Filter low-count SNPs (match row names with count matrices)

unresponsive_m6A <- unresponsive_m6A[row.names(unresponsive_m6A)%in%
                                       row.names(unresponsive_counts),]
responsive_m6A <- responsive_m6A[row.names(responsive_m6A)%in%
                                   row.names(responsive_counts),]

Split count matrices by parent of origin for testing

unresponsive.pat.m6A <- unresponsive_m6A[,metadata[metadata$parent%in%c("D")&
                                         metadata$phenotype=="unresponsive",
                                         "sample.id"]]

unresponsive.pat <- unresponsive_counts[,metadata[metadata$parent%in%c("D")&
                                         metadata$phenotype=="unresponsive",
                                         "sample.id"]]

unresponsive.mat.m6A <- unresponsive_m6A[,metadata[metadata$parent%in%c("Q")&
                                         metadata$phenotype=="unresponsive",
                                         "sample.id"]]

unresponsive.mat <- unresponsive_counts[,metadata[metadata$parent%in%c("Q")&
                                         metadata$phenotype=="unresponsive",
                                         "sample.id"]]

responsive.pat.m6A <- responsive_m6A[,metadata[metadata$parent%in%c("D")&
                                     metadata$phenotype=="responsive",
                                     "sample.id"]]

responsive.pat <- responsive_counts[,metadata[metadata$parent%in%c("D")&
                                     metadata$phenotype=="responsive",
                                     "sample.id"]]

responsive.mat.m6A <- responsive_m6A[,metadata[metadata$parent%in%c("Q")&
                                     metadata$phenotype=="responsive",
                                     "sample.id"]]

responsive.mat <- responsive_counts[,metadata[metadata$parent%in%c("Q")&
                                     metadata$phenotype=="responsive",
                                     "sample.id"]]

Conduct two-tailed unpooled Z-test at each parent SNP

ztest.df <- function(pat.m6A,pat.exp,
                     mat.m6A,mat.exp){
  i.len <- length(row.names(pat.exp))
  return.list <- list()
  for(i in 1:i.len){
    SNP_gene <- row.names(pat.exp[i,])
    pm <- mean(as.numeric(pat.m6A[i,]))
    pe <- mean(as.numeric(pat.exp[i,]))
    mm <- mean(as.numeric(mat.m6A[i,]))
    me <- mean(as.numeric(mat.exp[i,]))
    sigmaHatD <- sqrt(((pm*(1-pm))/pe)+((mm*(1-mm))/me))
    z <- abs((pm-mm)/sigmaHatD)
    test <- 2*pnorm(q=z, lower.tail=F)
    return.df <- data.frame(SNP_gene=SNP_gene,p=test)
    return.list[[i]] <- return.df
  }
  return(bind_rows(return.list))
}
unresponsive.Z <- ztest.df(unresponsive.pat.m6A,
                           unresponsive.pat,
                           unresponsive.mat.m6A,
                           unresponsive.mat)
unresponsive.Z[is.na(unresponsive.Z)] <- 1
write.csv(unresponsive.Z,"results/EHBxAHB_unresponsiveZ.csv", row.names=F)

responsive.Z <- ztest.df(responsive.pat.m6A,
                         responsive.pat,
                         responsive.mat.m6A,
                         responsive.mat)
responsive.Z[is.na(responsive.Z)] <- 1
write.csv(responsive.Z,"results/EHBxAHB_responsiveZ.csv", row.names=F)

Assess test results for each gene

PSm6A.analysis <- function(counts,phenotype,metadata,Z){
  # Split count matrices by cross and parent of origin for plotting
  p1.pat <- counts[,metadata[metadata$parent%in%c("D")&
                               metadata$lineage=="B"&
                               metadata$phenotype==phenotype,"sample.id"]]
  p1.mat <- counts[,metadata[metadata$parent%in%c("Q")&
                               metadata$lineage=="B"&
                               metadata$phenotype==phenotype,"sample.id"]]
  p2.pat <- counts[,metadata[metadata$parent%in%c("D")&
                               metadata$lineage=="A"&
                               metadata$phenotype==phenotype,"sample.id"]]
  p2.mat <- counts[,metadata[metadata$parent%in%c("Q")&
                               metadata$lineage=="A"&
                               metadata$phenotype==phenotype,"sample.id"]]
  # Set up a data.frame to plot %p1 and %p2 for each SNP
  p1.plot <- data.frame(rowSums(p1.pat)/(rowSums(p1.mat)+rowSums(p1.pat)))
  names(p1.plot) <- c("p1")
  p1.plot[is.nan(p1.plot$p1),"p1"] <- 0
  p2.plot <- data.frame(rowSums(p2.mat)/(rowSums(p2.mat)+rowSums(p2.pat)))
  names(p2.plot) <- c("p2")
  p2.plot[is.nan(p2.plot$p2),"p2"] <- 0
  plot <- cbind(p1.plot,p2.plot)
  # Join results of Storer-Kim tests
  plot <- plot[row.names(plot)%in%Z$SNP_gene,]
  plot$Z.p <- Z$p
  plot$SNP_gene <- row.names(plot)
  plot$gene <- as.character(map(strsplit(plot$SNP_gene, split = ":"), 2))
  # Correct for multiple testing
  plot$Z.padj <- p.adjust(plot$Z.p,"BH")
  plot$bias <- "NA"
  # For each gene, check whether all SNPs are biased in the same direction
  for(i in 1:length(row.names(plot))){
    p <- plot[i,"Z.padj"]
    p1 <- plot[i,"p1"]
    p2 <- plot[i,"p2"]
    if(p<0.05&p1>0.6&p2<0.4){plot[i,"bias"] <- "pat"}
    if(p<0.05&p1<0.4&p2>0.6){plot[i,"bias"] <- "mat"}
    if(p<0.05&p1<0.4&p2<0.4){plot[i,"bias"] <- "EHB"}
    if(p<0.05&p1>0.6&p2>0.6){plot[i,"bias"] <- "AHB"}
  }
  biaslist <- data.frame(matrix(ncol=2,nrow=0))
  names(biaslist) <- c("gene","bias")
  genelist <- unique(plot$gene)
  for(i in 1:length(genelist)){
    tmp <- unique(plot[plot$gene==genelist[i],"bias"])
    if(length(tmp)>1){
      if(length(tmp)==2){
        if(any(tmp%in%"NA")){
          bias <- tmp[!tmp%in%"NA"]
        }else{bias <- "NA"}
      }else{
        bias <- "NA"
      }
    }else{bias <- tmp}
    biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
  }
  plot <- plot %>% left_join(biaslist, by = c('gene' = 'gene')) 
  names(plot)[c(7:8)] <- c("xbias","bias")
  plot$bias.plot <- "NA"
  for(i in 1:length(row.names(plot))){
    p1 <- plot$p1[i]
    p2 <- plot$p2[i]
    bias <- plot$bias[i]
    if(!bias=="NA"){
      if(bias=="pat"){if(p1>0.6&p2<0.4){plot[i,"bias.plot"]<- "pat"}}
      if(bias=="mat"){if(p1<0.4&p2>0.6){plot[i,"bias.plot"] <- "mat"}}
      if(bias=="EHB"){if(p1<0.4&p2<0.4){plot[i,"bias.plot"] <- "EHB"}}
      if(bias=="AHB"){if(p1>0.6&p2>0.6){plot[i,"bias.plot"] <- "AHB"}}
    }
  }
  plot <- rbind(plot[plot$bias.plot%in%c("NA"),],
                plot[plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
  plot$bias.plot <- factor(plot$bias.plot,
                           levels = c("NA","mat", "AHB", "EHB", "pat"))
  return(plot)
}
unresponsive.plot <- PSm6A.analysis(unresponsive_m6A,"unresponsive",metadata,
                                    unresponsive.Z)
write.csv(unresponsive.plot,"results/EHBxAHB_unresponsive_PSm6A.csv",
          row.names=F)

responsive.plot <- PSm6A.analysis(responsive_m6A,"responsive",metadata,
                                 responsive.Z)
write.csv(responsive.plot,"results/EHBxAHB_responsive_PSm6A.csv",row.names=F)

Plot data for each phenotype

Plot PSm6A by transcript

unresponsive.collapse <- PSGE.collapse.avgExp(unresponsive.plot,
                                              unresponsive_m6A,
                                              metadata,"unresponsive")
g1.avgExp <- PSGE.plot.tx(unresponsive.collapse,"Non-aggressive workers")
g1.avgExp

responsive.collapse <- PSGE.collapse.avgExp(responsive.plot,
                                            responsive_m6A,
                                            metadata,"responsive")
g2.avgExp <- PSGE.plot.tx(responsive.collapse,"Aggressive workers")
g2.avgExp

allgenes <- read.table("src/Amel_HAv3.1_genes.bed",header=F)[,c(4)]
triplot.avgExp <- triplot.plot(unresponsive.collapse,responsive.collapse,
                               "Unresponsive","Responsive",allgenes)
## Warning in chisq.test(data.frame(Success = c(gmid.df[3, 1], gmid.df[3, 3]), :
## Chi-squared approximation may be incorrect
plot(triplot.avgExp)

fig2 <- arrangeGrob(g1.avgExp, triplot.avgExp, g2.avgExp, widths=c(5,2.5,5))
ggsave(file="results/AHBxEHB_m6A.png", plot=fig2, width=15, height=6)

Plot allelic expression distance ratios of PSm6A transcripts

unresponsive.collapse.hourglass <- PSGE.collapse.hourglass(unresponsive.plot,
                                                           unresponsive_m6A,
                                                           "unresponsive",
                                                           metadata)
responsive.collapse.hourglass <- PSGE.collapse.hourglass(responsive.plot,
                                                         responsive_m6A,
                                                         "responsive",
                                                         metadata)
data.hourglass <- rbind(unresponsive.collapse.hourglass,
                        responsive.collapse.hourglass)

g1.hourglass <- PSGE.plot.hourglass(data.hourglass,"EHBxAHB")
plot(g1.hourglass)

ggsave(file="results/EHBxAHB_m6A_hourglass.png", plot=g1.hourglass, 
       width=10, height=4, bg="white")

Compare PSGE to PSm6A, and DElincRNAs & isoform switching

un.PSGE <- read.csv("results/EHBxAHB_unresponsive_PSGE.csv")
un.PSGE[is.na(un.PSGE$xbias),"xbias"] <- "NA"
un.PSGE[is.na(un.PSGE$bias),"bias"] <- "NA"
un.PSGE[is.na(un.PSGE$bias.plot),"bias.plot"] <- "NA"
un.PSGE <- rbind(un.PSGE[un.PSGE$bias.plot%in%c("NA"),],
                      un.PSGE[un.PSGE$bias.plot%in%c(
                        "mat", "AHB", "EHB", "pat"),])
un.PSGE$bias.plot <- factor(un.PSGE$bias.plot,
                                 levels = c("NA","mat", "AHB", "EHB", "pat"))

res.PSGE <- read.csv("results/EHBxAHB_responsive_PSGE.csv")
res.PSGE[is.na(res.PSGE$xbias),"xbias"] <- "NA"
res.PSGE[is.na(res.PSGE$bias),"bias"] <- "NA"
res.PSGE[is.na(res.PSGE$bias.plot),"bias.plot"] <- "NA"
res.PSGE <- rbind(res.PSGE[res.PSGE$bias.plot%in%c("NA"),],
                      res.PSGE[res.PSGE$bias.plot%in%c(
                        "mat", "AHB", "EHB", "pat"),])
res.PSGE$bias.plot <- factor(res.PSGE$bias.plot,
                                 levels = c("NA","mat", "AHB", "EHB", "pat"))

un.m6A <- read.csv("results/EHBxAHB_unresponsive_PSm6A.csv")
un.m6A[is.na(un.m6A$xbias),"xbias"] <- "NA"
un.m6A[is.na(un.m6A$bias),"bias"] <- "NA"
un.m6A[is.na(un.m6A$bias.plot),"bias.plot"] <- "NA"
un.m6A <- rbind(un.m6A[un.m6A$bias.plot%in%c("NA"),],
                      un.m6A[un.m6A$bias.plot%in%c(
                        "mat", "AHB", "EHB", "pat"),])
un.m6A$bias.plot <- factor(un.m6A$bias.plot,
                                 levels = c("NA","mat", "AHB", "EHB", "pat"))

res.m6A <- read.csv("results/EHBxAHB_responsive_PSm6A.csv")
res.m6A[is.na(res.m6A$xbias),"xbias"] <- "NA"
res.m6A[is.na(res.m6A$bias),"bias"] <- "NA"
res.m6A[is.na(res.m6A$bias.plot),"bias.plot"] <- "NA"
res.m6A <- rbind(res.m6A[res.m6A$bias.plot%in%c("NA"),],
                      res.m6A[res.m6A$bias.plot%in%c(
                        "mat", "AHB", "EHB", "pat"),])
res.m6A$bias.plot <- factor(res.m6A$bias.plot,
                                 levels = c("NA","mat", "AHB", "EHB", "pat"))

un.matBias <- unique(un.PSGE[un.PSGE$bias.plot=="mat","gene"])
res.matBias <- unique(res.PSGE[res.PSGE$bias.plot=="mat","gene"])
un.patBias <- unique(un.PSGE[un.PSGE$bias.plot=="pat","gene"])
res.patBias <- unique(res.PSGE[res.PSGE$bias.plot=="pat","gene"])

un.m6A.matBias <- unique(un.m6A[un.m6A$bias.plot=="mat","gene"])
res.m6A.matBias <- unique(res.m6A[res.m6A$bias.plot=="mat","gene"])
un.m6A.patBias <- unique(un.m6A[un.m6A$bias.plot=="pat","gene"])
res.m6A.patBias <- unique(res.m6A[res.m6A$bias.plot=="pat","gene"])

un.PSGE.list <- c(un.matBias,un.patBias)
un.m6A.list <- c(un.m6A.matBias,un.m6A.patBias)
un.overlap.list <- intersect(un.PSGE.list,un.m6A.list)
un.overlap.list
## [1] "406088.tx_119" "726321.tx_407"
res.PSGE.list <- c(res.matBias,res.patBias)
res.m6A.list <- c(res.m6A.matBias,res.m6A.patBias)
res.overlap.list <- intersect(res.PSGE.list,res.m6A.list)
res.overlap.list
## [1] "724552.tx_2582"
SNP_pos <- read.csv("results/SNP_gene_tx_IDs.csv",row.names=1)
DElincRNAs <- read.csv("results/DElincRNAs.csv",header=F)[,1]
CK <- unique(SNP_pos[SNP_pos$Transcript%in%DElincRNAs,"Gene"])
unique(as.character(map(strsplit(CK, split = ":"), 2)))
## [1] "lincRNA_2048.tx_256"  "lincRNA_2352.tx_629"  "lincRNA_2530.tx_1445"
sigSwitches <- read.table("results/sig_switches_by_phenotype.txt",header=1)
sigSwitches$geneID <- as.character(sigSwitches$geneID)

un.overlap.list <- intersect(unique(as.character(
  map(strsplit(un.PSGE.list, split = "[.]"), 1))), 
  as.character(sigSwitches$geneID))
un.overlap.list
## character(0)
un.overlap.list <- intersect(unique(as.character(
  map(strsplit(un.m6A.list, split = "[.]"), 1))), 
  as.character(sigSwitches$geneID))
un.overlap.list
## character(0)
res.overlap.list <- intersect(unique(as.character(
  map(strsplit(res.PSGE.list, split = "[.]"), 1))), 
  as.character(sigSwitches$geneID))
res.overlap.list
## character(0)
res.overlap.list <- intersect(unique(as.character(
  map(strsplit(res.m6A.list, split = "[.]"), 1))), 
  as.character(sigSwitches$geneID))
res.overlap.list
## character(0)

Session info

sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.5.0       ggdist_3.2.0       gghalves_0.1.4     doSNOW_1.0.20     
##  [5] snow_0.4-4         doParallel_1.0.17  iterators_1.0.14   foreach_1.5.2     
##  [9] kableExtra_1.3.4   gridExtra_2.3      viridis_0.6.2      viridisLite_0.4.1 
## [13] car_3.1-1          carData_3.0-5      lmerTest_3.1-3     lme4_1.1-31       
## [17] Matrix_1.5-1       tryCatchLog_1.3.1  Rfast_2.0.6        RcppZiggurat_0.1.6
## [21] Rcpp_1.0.9         plyr_1.8.8         forcats_0.5.2      stringr_1.5.0     
## [25] dplyr_1.0.10       purrr_1.0.1        readr_2.1.3        tidyr_1.2.1       
## [29] tibble_3.1.8       ggplot2_3.4.0      tidyverse_1.3.2   
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-160         fs_1.5.2             lubridate_1.9.0     
##  [4] webshot_0.5.4        httr_1.4.4           numDeriv_2016.8-1.1 
##  [7] tools_4.2.2          backports_1.4.1      bslib_0.4.2         
## [10] utf8_1.2.2           R6_2.5.1             DBI_1.1.3           
## [13] colorspace_2.0-3     withr_2.5.0          tidyselect_1.2.0    
## [16] compiler_4.2.2       cli_3.6.0            rvest_1.0.3         
## [19] xml2_1.3.3           sass_0.4.4           scales_1.2.1        
## [22] systemfonts_1.0.4    digest_0.6.31        minqa_1.2.5         
## [25] rmarkdown_2.19       svglite_2.1.1        pkgconfig_2.0.3     
## [28] htmltools_0.5.4      highr_0.10           dbplyr_2.2.1        
## [31] fastmap_1.1.0        rlang_1.0.6          readxl_1.4.1        
## [34] rstudioapi_0.14      farver_2.1.1         jquerylib_0.1.4     
## [37] generics_0.1.3       jsonlite_1.8.4       distributional_0.3.1
## [40] googlesheets4_1.0.1  magrittr_2.0.3       munsell_0.5.0       
## [43] fansi_1.0.3          abind_1.4-5          lifecycle_1.0.3     
## [46] stringi_1.7.12       yaml_2.3.6           MASS_7.3-58.1       
## [49] crayon_1.5.2         lattice_0.20-45      cowplot_1.1.1       
## [52] haven_2.5.1          splines_4.2.2        hms_1.1.2           
## [55] knitr_1.41           pillar_1.8.1         boot_1.3-28         
## [58] ggsignif_0.6.4       codetools_0.2-18     reprex_2.0.2        
## [61] glue_1.6.2           evaluate_0.19        modelr_0.1.10       
## [64] vctrs_0.5.1          nloptr_2.0.3         tzdb_0.3.0          
## [67] cellranger_1.1.0     gtable_0.3.1         assertthat_0.2.1    
## [70] cachem_1.0.6         xfun_0.36            broom_1.0.2         
## [73] rstatix_0.7.1        googledrive_2.0.0    gargle_1.2.1        
## [76] timechange_0.2.0     ellipsis_0.3.2